- Creado por Hadley Wickham
- Es un set de paquetes de R que evolucionan constantemente.
- El objetivo es que permiten una manera de trabajar mas “limpia” (“tidy”).
- Trabaja siguiendo los principios de “tidy data”
library(palmerpenguins) #Set de datos para ejemplos head(penguins)
## # A tibble: 6 x 8 ## species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex ## <fct> <fct> <dbl> <dbl> <int> <int> <fct> ## 1 Adelie Torge~ 39.1 18.7 181 3750 male ## 2 Adelie Torge~ 39.5 17.4 186 3800 fema~ ## 3 Adelie Torge~ 40.3 18 195 3250 fema~ ## 4 Adelie Torge~ NA NA NA NA <NA> ## 5 Adelie Torge~ 36.7 19.3 193 3450 fema~ ## 6 Adelie Torge~ 39.3 20.6 190 3650 male ## # ... with 1 more variable: year <int>
Básicamente un data frame con mas información.
Para aprender mas sobre por que se creo “tidyverse” lee el manifesto del autor
Happy families are all alike; every unhappy family is unhappy in its own way — Leo Tolstoy
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6 v purrr 0.3.4 ## v tibble 3.1.6 v dplyr 1.0.8 ## v tidyr 1.2.0 v stringr 1.4.0 ## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag()
palmerpenguins:: penguins %>% group_by(species) %>% #Eligiendo la agrupación que nos interesa summarize(across(where(is.numeric), mean, na.rm = TRUE))
## # A tibble: 3 x 6 ## species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g year ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 Adelie 38.8 18.3 190. 3701. 2008. ## 2 Chinstrap 48.8 18.4 196. 3733. 2008. ## 3 Gentoo 47.5 15.0 217. 5076. 2008.
# Calculando la media por grupo para cada categoría
Ejemplos:
library(lubridate, warn.conflicts = FALSE) ymd(20101215)
## [1] "2010-12-15"
mdy("4/1/17")
## [1] "2017-04-01"
time <- ymd_hms("2010-12-13 15:30:30")
time
## [1] "2010-12-13 15:30:30 UTC"
force_tz(time, "America/Chicago")
## [1] "2010-12-13 15:30:30 CST"
library(lubridate, warn.conflicts = FALSE) duration(1.5, "minutes")
## [1] "90s (~1.5 minutes)"
duration("2days 2hours 2mins 2secs")
## [1] "180122s (~2.08 days)"
#Puedes integrarlo a los operadores lógicos
duration("day 2 sec") > "day 1sec"
## [1] TRUE
Aquí podéis leer el capitulo en trabajar con fechas y horas en R
mass_flipper <- ggplot(data = penguins,
aes(x = flipper_length_mm,
y = body_mass_g)) +
geom_point(aes(color = species,
shape = species),
size = 3,
alpha = 0.8) +
theme_minimal() +
scale_color_manual(values = c("darkorange","purple","cyan4")) +
labs(title = "Penguin size, Palmer Station LTER",
subtitle = "Flipper length and body mass for Adelie, Chinstrap and Gentoo Penguins",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Penguin species",
shape = "Penguin species") +
theme(legend.position = c(0.2, 0.7),
legend.background = element_rect(fill = "white", color = NA),
plot.title.position = "plot",
plot.caption = element_text(hjust = 0, face= "italic"),
plot.caption.position = "plot")
mass_flipper
## `geom_smooth()` using formula 'y ~ x'
bill_len_dep <- ggplot(data = penguins,
aes(x = bill_length_mm,
y = bill_depth_mm,
group = species)) +
geom_point(aes(color = species,
shape = species),
size = 3,
alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, aes(color = species)) +
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
bmass_hist <- ggplot(data = penguins, aes(x = body_mass_g)) +
geom_histogram(aes(fill = species),
alpha = 0.5,
position = "identity") +
scale_fill_manual(values = c("darkorange","purple","cyan4")) +
theme_minimal() +
labs(x = "Body mass (g)",
y = "Frequency",
title = "Penguin body mass")
mass_hist
library(ggdist) ## add uncertainity visualizations to ggplot2
theme_set(theme_classic())
## calculate bill ratio and summary stats
penguins %>%
mutate(bill_ratio = bill_length_mm / bill_depth_mm) %>%
filter(!is.na(bill_ratio)) %>%
group_by(species) %>%
mutate(
n = n(),
median = median(bill_ratio),
max = max(bill_ratio)
) %>%
ungroup() %>%
mutate(species_num = as.numeric(fct_rev(species))) %>%
ggplot(aes(bill_ratio, species_num, color = species)) +
stat_summary(
geom = "linerange",
fun.min = function(x) -Inf,
fun.max = function(x) median(x, na.rm = TRUE),
linetype = "dotted",
orientation = "y",
size = .7
) +
geom_point(
aes(y = species_num - .15),
shape = "|",
size = 5,
alpha = .33
) +
ggdist::stat_halfeye(
aes(
y = species_num,
color = species,
fill = after_scale(colorspace::lighten(color, .5))
),
shape = 18,
point_size = 3,
interval_size = 1.8,
adjust = .5,
.width = c(0, 1)
) +
geom_text(
aes(x = median, label = format(round(median, 2), nsmall = 2)),
stat = "unique",
color = "white",
family = "Open Sans",
fontface = "bold",
size = 3.4,
nudge_y = .15
) +
geom_text(
aes(x = max, label = glue::glue("n = {n}")),
stat = "unique",
family = "Open Sans",
fontface = "bold",
size = 3.5,
hjust = 0,
nudge_x = .01,
nudge_y = .02
) +
coord_cartesian(clip = "off", expand = FALSE) +
scale_x_continuous(
limits = c(1.6, 3.8),
breaks = seq(1.6, 3.8, by = .2)
) +
scale_y_continuous(
limits = c(.55, NA),
breaks = 1:3,
labels = c("Gentoo", "Chinstrap", "Adélie")
) +
scale_color_manual(values = c("#3d6721", "#a86826", "#006c89"), guide = "none") +
scale_fill_manual(values = c("#3d6721", "#a86826", "#006c89"), guide = "none") +
labs(
x = "Bill ratio",
y = NULL,
subtitle = "B. Raincloud plot showing the distribution of bill ratios, estimated as bill length divided by bill depth.",
caption = "Data: Gorman, Williams & Fraser (2014) *PLoS ONE* • Illustration: Allison Horst"
) +
theme(
panel.grid.major.x = element_line(size = .35),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 13),
axis.ticks.length = unit(0, "lines"),
plot.title.position = 'plot',
plot.subtitle = element_text(margin = margin(t = 5, b = 10)),
plot.margin = margin(10, 25, 10, 25)
)